library(tidyverse)
library(googlesheets4)
library(plotly)
library(ggplot2)
library(ggridges)
library(DT)

Data! Thanks Collier

Setting up each country to have a color

CountryColors = tibble(
  Country = c("USA","France","Italy","Israel","Spain","Chile" )
  ,color = c("rgba(255,255,255,1)","rgba(255,0,255,1)","rgba(0,255,255,1)"
             ,"rgba(255,255,0,1)","rgba(255,0,0,1)","rgba(0,0,255,1)")
)

Cleaning the data:

# head(WineData)

# Remove empty rows at the bottom
WineData = WineData[is.na(WineData$Name)==FALSE,]

# Add average to the no vintage wine
MeanVintage = mean(na.omit(as.numeric(WineData$Vintage)))
## Warning in na.omit(as.numeric(WineData$Vintage)): NAs introduced by coercion

## Warning in na.omit(as.numeric(WineData$Vintage)): NAs introduced by coercion
# clean data: split country out, add vintage as all numeric
WineData = WineData %>%
  mutate(
    CountryClean = str_split(Country, ", ") # split region and country into list
    ,Vintage = as.vector(Vintage) #clean vintage from blended vector (dbl & chr)
    ,VintageClean = as.numeric(Vintage) # set data to numeric to call out NAs
  ) %>%
  unnest_wider(CountryClean) %>% # split region and country list into two columns
  rename("Region" = ...1 
         ,"CountryClean" = ...2) %>% #rename columns from unnest
  rowwise() %>%
  mutate(
    VintageClean = ifelse(is.na(VintageClean) == TRUE, MeanVintage, Vintage) #set unknown vintage to average
    ,CountryColor = CountryColors$color[match(CountryClean,CountryColors$Country)] #add color for plotting
    ,Size = (log(`Total Price`+1, exp(.1))+1)/2
  )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Let’s take a look at the data after getting it cleaned

DT::datatable(WineData)

Bubble Chart!

Still have a few bits to edit - Size is off - Colors are off

plot_ly(
  data = WineData
  ,x = ~VintageClean
  ,y = ~Price
  ,name = ~CountryClean
  ,color = ~CountryColor
  ,text = ~paste0("Brand: ", `Grower/bottler`
                  ,"</br></br>Name: ", Name
                  ,"</br>Country: ", CountryClean
                  ,"</br>Vintage: ", Vintage
                  ,"</br>Price: ", Price
                  ,"</br>Bottles: ", `Bottles remaining`
                  ,"</br>Total Value: ", `Total Price`
                  )
  ,hoverinfo = 'text'
  ,type = "scatter"
  ,mode = "markers"
  ,marker = list(
    size = ~Size
    ,opacity = .7
  )
)

Rdigeline histogram chart!

Fun way to compare the bottle prices by country

WineData %>%
  mutate(text = fct_reorder(CountryClean,Price)) %>%
  ggplot(
    aes(y = text, x = Price, fill = text)
    ) +
    geom_density_ridges(alpha = .6, stat = "binline", bins = 20) +
    ggridges::theme_ridges() +
    theme(
      legend.position="none",
      panel.spacing = unit(6, "lines"),
      strip.text.x = element_text(size = 8)
    ) +
    xlab("Price ($)") +
    ylab("")